
# =============================================================
# File: App_F_hq.R
# Purpose: Produces results from Appendix Section F: Headquarters Operations
# Paper: Foreignness as an Asset: European Carbon Regulation and the Relocation Threat among Multinational Firms
# Author: Patrick Bayer, patrick.bayer@strath.ac.uk
# Date: 5 October 2022
#
# Data: ./data.csv
#
# Technical disclaimer:
# All analyses in R version 4.1.2 (2021-11-01)
# RStudio 2022.07.1 Build 554 ("Spotted Wakerobin" Release (7872775e, 2022-07-22) for Windows)
# Windows 10 Enterprise, 64-bit
# 12th Gen Intel(R) Core(TM) i7-1270P 2.20 GHz with 32GB RAM
# =============================================================

library(lmtest)
library(sandwich)
library(MASS)
library(MatchIt)
library(cem)
library(cobalt)
library(cowplot)
library(ggpubr)
library(marginaleffects)
library(margins)
library(AER)
library(tidyverse)

# Load data
df <- read_csv(file = "./data.csv")

# Trim sample down to true within firm sample
# (1) Limit sample to European MNCs: no allocation data for domestically-owned operations for non-EU MNCs
# (2) Exclude MNCs with only foreign-owned operations as there is no within MNC variation

df$sample <- ifelse(df$mnc.eu==1 & df$foreign.share<1,1,0)
df <- df[df$sample==1,]
df$foreign <- as.factor(df$foreign) # set variable as categorical

# =============================================================
# APPENDIX F:  Headquarters operations
# =============================================================

# Two tests help address the concerns that the presence of company headquarters among domestic plants bias results
# (1) Replication of main results for Combustion sector
# (2) Replication of main results when excluding plants in same city as owner

# =============================================================
# Model (1): Combustion sector
# =============================================================

m1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df[df$ETSsector=="Combustion",])
summary(m1)

# Observations
nobs(m1)
length(m1$xlevels$`as.factor(iso2)`)
length(m1$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m1)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs

# =============================================================
# Model (3): Combustion sector
# =============================================================

# Exact matching on 4-digit NACE code and firm
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 

m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE & df$ETSsector=="Combustion",], method="exact")
df.match <- match.data(m)

# Re-run main model on matched sample
m3 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df.match[df.match$ETSsector=="Combustion",], weights=weights)
summary(m3)

# Observations
nobs(m3)
length(m3$xlevels$`as.factor(iso2)`)
length(m3$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m3)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m3, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Excluding potential HQ sites
# =============================================================

# Standardize names of cities of plants and owners
df$city_std <- tolower(df$city) # make city names of plants all lower case
df$citybvd_std <- tolower(df$cityBVD) # make city names of owners all lower case

# Manual inspection identifies plants that share the SAME city and postcode information
hq.list <- c("264", "354", "465", "1034", "1080", "1087", "1088", "1089", "1090", "1101", "1104", "1106", "1107", "1219", "1221", 
             "1262", "2184", "2357", "2361", "2577", "2582", "2625", "2632", "2683", "2823")

# Create HQ identifier variable
df$hq <- ifelse((is.na(df$zipcode)==TRUE | is.na(df$zipcodeBVD)==TRUE) & 
                (is.na(df$city_std)==TRUE | is.na(df$citybvd_std)==TRUE), NA,
                ifelse(row.names(df) %in% hq.list,1,0))

# Manual fixes to data
df$hq[is.na(df$hq)==TRUE & df$iso2!=df$owner.iso] <- 0 # HQ and plant need to be in same country
df$hq[row.names(df)=="1287"] <- NA # fixes city name entry coded as "xxx"
df$hq[row.names(df)=="2120"] <- NA # fixes city "name" entry coded as "not applicable"

table(df$hq, useNA="ifany")

# # Easy way to compare HQ coding variable
# data.check <- df[,c("zipcode", "zipcodeBVD", "city_std", "citybvd_std", "hq", "iso2", "owner.iso")]
# view(data.check)


# List of firms with potential HQ sites
firm.hq <- names(table(df$BVD[df$hq==1]))
length(firm.hq)

# Create data set without firms that potentially include HQ sites
wohq <- df[!(df$BVD %in% firm.hq),]



# =============================================================
# Model (1): W/o potential HQ site
# =============================================================

# Exclude firms with potential HQ sites
m1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=wohq)
summary(m1)


# Observations
nobs(m1)
length(m1$xlevels$`as.factor(iso2)`)
length(m1$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m1)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (2): W/o potential HQ site
# =============================================================

# Exclude firms with potential HQ sites
m2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=wohq)

# Observations
nobs(m2)
length(m2$xlevels$`as.factor(ETSsector)`)
length(m2$xlevels$`as.factor(iso2)`)
length(m2$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m2)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (3): W/o potential HQ site
# =============================================================

# Exact matching on 4-digit NACE code and firm
wohq$NACE <- ifelse(wohq$NACE=="",NA,df$NACE) # Recode empty strings as NA 

# Matching on data without firms that include potential HQ site
m <- matchit(foreign ~ NACE + BVD, data=wohq[is.na(wohq$NACE)==FALSE,], method="exact")
df.match <- match.data(m)


# Re-run main model on matched sample
# Exclude firms with potential HQ sites
m3 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match, weights=weights)
summary(m3)

# Observations
nobs(m3)
length(m3$xlevels$`as.factor(ETSsector)`)
length(m3$xlevels$`as.factor(iso2)`)
length(m3$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m3)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m3, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs



# Additional analysis that excludes plants (not firms) that include potential HQ sites

# =============================================================
# Model (1): W/o potential HQ site
# =============================================================

# Exclude potential HQ plants
m1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df[df$hq==0,])
summary(m1)


# Observations
nobs(m1)
length(m1$xlevels$`as.factor(iso2)`)
length(m1$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m1)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (2): W/o potential HQ site
# =============================================================

# Exclude potential HQ plants
m2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df[df$hq==0,])

# Observations
nobs(m2)
length(m2$xlevels$`as.factor(ETSsector)`)
length(m2$xlevels$`as.factor(iso2)`)
length(m2$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m2)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (3): W/o potential HQ site
# =============================================================

# Exact matching on 4-digit NACE code and firm
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 

m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE & is.na(df$hq)==FALSE & df$hq==0,], method="exact")
df.match <- match.data(m)


# Re-run main model on matched sample
# Exclude potential HQ plants
m3 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match[df.match$hq==0,], weights=weights)
summary(m3)

# Observations
nobs(m3)
length(m3$xlevels$`as.factor(ETSsector)`)
length(m3$xlevels$`as.factor(iso2)`)
length(m3$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m3)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m3, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs



# =============================================================
#                       END OF CODE
# =============================================================